Stock Market Prediction using Long Short Term Memory (LSTM)

Import all the required libraries¶

In [1]:
import warnings
warnings.simplefilter("ignore")
In [2]:
import pandas as pd
import datetime as dt
from datetime import date
import matplotlib.pyplot as plt
import yfinance as yf
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_percentage_error
2023-12-28 23:40:00.790997: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-28 23:40:01.195570: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-28 23:40:01.195664: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-28 23:40:01.274693: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-28 23:40:01.434397: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-28 23:40:03.189963: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT

Define start day to fetch the dataset from the yahoo finance library¶


In [3]:
# I'm using fetching TCS (Tata Consultancy Services Limited) online Data from Yahoo Finance 
In [4]:
START = "2002-01-01"
TODAY = date.today().strftime("%Y-%m-%d")

# Define a function to load the dataset

def load_data(ticker):
    data = yf.download(ticker, START, TODAY)
    data.reset_index(inplace=True)
    return data
In [5]:
data = load_data('TCS.NS')
df = data
df1 = data
df.head()
[*********************100%%**********************]  1 of 1 completed
Out[5]:
Date Open High Low Close Adj Close Volume
0 2002-08-12 38.724998 40.000000 38.724998 39.700001 28.128601 212976
1 2002-08-13 39.750000 40.387501 38.875000 39.162498 27.747757 153576
2 2002-08-14 39.250000 39.250000 35.724998 36.462502 25.834730 822776
3 2002-08-15 36.462502 36.462502 36.462502 36.462502 25.834730 0
4 2002-08-16 36.275002 38.000000 35.750000 36.375000 25.772724 811856
In [6]:
rows, columns = df.shape
print("Total Rows in Dataset :",rows)
print("Total Columns in Dataset :",columns)
Total Rows in Dataset : 5309
Total Columns in Dataset : 7
In [7]:
df = df.drop(['Date', 'Adj Close'], axis = 1)
df.head()
Out[7]:
Open High Low Close Volume
0 38.724998 40.000000 38.724998 39.700001 212976
1 39.750000 40.387501 38.875000 39.162498 153576
2 39.250000 39.250000 35.724998 36.462502 822776
3 36.462502 36.462502 36.462502 36.462502 0
4 36.275002 38.000000 35.750000 36.375000 811856
In [8]:
df.tail()
Out[8]:
Open High Low Close Volume
5304 3827.250000 3898.800049 3766.550049 3780.050049 2586083
5305 3756.250000 3806.699951 3743.350098 3787.500000 1517562
5306 3800.000000 3845.949951 3762.000000 3824.000000 2413058
5307 3819.850098 3834.000000 3790.149902 3795.550049 1285231
5308 3799.000000 3818.199951 3768.000000 3811.199951 1293976
In [9]:
df.sample(5)
Out[9]:
Open High Low Close Volume
4300 2053.000000 2082.000000 2045.500000 2078.500000 3334301
3661 1147.000000 1149.375000 1134.000000 1136.574951 2540510
1912 415.000000 418.000000 411.149994 412.524994 1997806
1415 215.649994 216.524994 210.562500 211.537506 1996828
1371 252.762497 255.725006 250.574997 251.300003 2318868

Visualizing Closing Price¶

In [10]:
df[["Close"]].plot()
#Volume Plot
df[["Volume"]].plot()
Out[10]:
<Axes: >
No description has been provided for this image
No description has been provided for this image
In [11]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Ohlc(x = df1['Date'],
        open = df1['Open'],
        high = df1['High'],
        low = df1['Low'],
        close = df1['Close']))
fig.show()
In [12]:
daily_close_px = df1[['Close']]
# Calculate the daily percentage change for `daily_close_px`
daily_pct_change = daily_close_px.pct_change()

# Plot the distributions
daily_pct_change.hist(bins=50, sharex=True, figsize=(12,8))

# Show the resulting plot
plt.show()
No description has been provided for this image
In [13]:
# Define the minumum of periods to consider 
min_periods = 75 

# Calculate the volatility
vol = daily_pct_change.rolling(min_periods).std() * np.sqrt(min_periods) 

# Plot the volatility
vol.plot(figsize=(10, 8))

# Show the plot
plt.show()
No description has been provided for this image
In [14]:
# Plot a scatter matrix with the `daily_pct_change` data 
pd.plotting.scatter_matrix(daily_pct_change, diagonal='kde', alpha=0.1,figsize=(12,12))

# Show the plot
plt.show()
No description has been provided for this image

SMA chart plotting for 20, 50, 100, 200, 300, 400, 500 day moving averages¶

In [15]:
df1['SMA20'] = df1.Close.rolling(20).mean()
df1['SMA50'] = df1.Close.rolling(50).mean()
df1['SMA100'] = df1.Close.rolling(100).mean()
df1['SMA200'] = df1.Close.rolling(200).mean()
df1['SMA300'] = df1.Close.rolling(300).mean()
df1['SMA400'] = df1.Close.rolling(400).mean()
df1['SMA500'] = df1.Close.rolling(500).mean()

fig = go.Figure(data=[go.Ohlc(x = df1['Date'],
                              open = df1['Open'],
                              high = df1['High'],
                              low = df1['Low'],
                              close = df1['Close'], name = "OHLC"),
                      go.Scatter(x = df1.Date, y = df1.SMA20, line=dict(color='orange', width=1), name="SMA20"),
                      go.Scatter(x = df1.Date, y = df1.SMA50, line=dict(color='green', width=1), name="SMA50"),
                      go.Scatter(x = df1.Date, y = df1.SMA100, line=dict(color='blue', width=1), name="SMA100"),
                      go.Scatter(x = df1.Date, y = df1.SMA200, line=dict(color='red', width=1), name="SMA200"),
                      go.Scatter(x = df1.Date, y = df1.SMA300, line=dict(color='pink', width=1), name="SMA300"),
                      go.Scatter(x = df1.Date, y = df1.SMA400, line=dict(color='violet', width=1), name="SMA400"),
                      go.Scatter(x = df1.Date, y = df1.SMA500, line=dict(color='purple', width=1), name="SMA500")])
fig.show()
In [16]:
df1['EMA20'] = df1.Close.ewm(span=20, adjust=False).mean()
df1['EMA50'] = df1.Close.ewm(span=50, adjust=False).mean()
df1['EMA100'] = df1.Close.ewm(span=100, adjust=False).mean()
df1['EMA200'] = df1.Close.ewm(span=200, adjust=False).mean()
df1['EMA300'] = df1.Close.ewm(span=300, adjust=False).mean()
df1['EMA400'] = df1.Close.ewm(span=400, adjust=False).mean()
df1['EMA500'] = df1.Close.ewm(span=500, adjust=False).mean()

fig = go.Figure(data = [go.Ohlc(x = df1['Date'],
                              open = df1['Open'],
                              high = df1['High'],
                              low = df1['Low'],
                              close = df1['Close'], name = "OHLC"),
                      go.Scatter(x = df1.Date, y = df1.SMA20, line=dict(color='orange', width=1), name="EMA20"),
                      go.Scatter(x = df1.Date, y = df1.SMA50, line=dict(color='green', width=1), name="EMA50"),
                      go.Scatter(x = df1.Date, y = df1.SMA100, line=dict(color='blue', width=1), name="EMA100"),
                      go.Scatter(x = df1.Date, y = df1.SMA200, line=dict(color='pink', width=1), name="EMA200"),
                      go.Scatter(x = df1.Date, y = df1.SMA300, line=dict(color='violet', width=1), name="EMA300"),
                      go.Scatter(x = df1.Date, y = df1.SMA400, line=dict(color='red', width=1), name="EMA400"),
                      go.Scatter(x = df1.Date, y = df1.SMA500, line=dict(color='purple', width=1), name="EMA500")])
fig.show()
In [17]:
plt.figure(figsize=(15, 10))
plt.plot(df['Close'])
plt.title("TCS Stock Price")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.grid(True)
plt.show()
No description has been provided for this image
In [18]:
df
Out[18]:
Open High Low Close Volume
0 38.724998 40.000000 38.724998 39.700001 212976
1 39.750000 40.387501 38.875000 39.162498 153576
2 39.250000 39.250000 35.724998 36.462502 822776
3 36.462502 36.462502 36.462502 36.462502 0
4 36.275002 38.000000 35.750000 36.375000 811856
... ... ... ... ... ...
5304 3827.250000 3898.800049 3766.550049 3780.050049 2586083
5305 3756.250000 3806.699951 3743.350098 3787.500000 1517562
5306 3800.000000 3845.949951 3762.000000 3824.000000 2413058
5307 3819.850098 3834.000000 3790.149902 3795.550049 1285231
5308 3799.000000 3818.199951 3768.000000 3811.199951 1293976

5309 rows × 5 columns

Plotting moving averages of 100 day¶


In [19]:
moving_average_100 = df.Close.rolling(200).mean()
In [20]:
moving_average_100
Out[20]:
0               NaN
1               NaN
2               NaN
3               NaN
4               NaN
           ...     
5304    3374.338247
5305    3376.616497
5306    3379.172246
5307    3381.221497
5308    3383.670247
Name: Close, Length: 5309, dtype: float64
In [21]:
moving_average_100.describe()
Out[21]:
count    5110.000000
mean     1133.830330
std      1063.894758
min        38.387501
25%       241.267747
50%       726.603124
75%      1758.078746
max      3640.561985
Name: Close, dtype: float64
In [22]:
plt.figure(figsize = (15,10))
plt.plot(df.Close)
plt.plot(moving_average_100, 'r')
plt.grid(True)
plt.title('Graph Of Moving Averages Of 100 Days')
Out[22]:
Text(0.5, 1.0, 'Graph Of Moving Averages Of 100 Days')
No description has been provided for this image

Defining 200 days moving averages and plotting comparision graph with 100 days moving averages¶


In [23]:
moving_average_200 = df.Close.rolling(200).mean()
In [24]:
moving_average_200
Out[24]:
0               NaN
1               NaN
2               NaN
3               NaN
4               NaN
           ...     
5304    3374.338247
5305    3376.616497
5306    3379.172246
5307    3381.221497
5308    3383.670247
Name: Close, Length: 5309, dtype: float64
In [25]:
moving_average_200.describe
Out[25]:
<bound method NDFrame.describe of 0               NaN
1               NaN
2               NaN
3               NaN
4               NaN
           ...     
5304    3374.338247
5305    3376.616497
5306    3379.172246
5307    3381.221497
5308    3383.670247
Name: Close, Length: 5309, dtype: float64>
In [26]:
plt.figure(figsize = (15,10))
plt.plot(df.Close)
plt.plot(moving_average_100, 'r')
plt.plot(moving_average_200, 'g')
plt.grid(True)
plt.title('Comparision Of 100 Days And 200 Days Moving Averages')
Out[26]:
Text(0.5, 1.0, 'Comparision Of 100 Days And 200 Days Moving Averages')
No description has been provided for this image
In [27]:
df.shape # i have dropped two Columns from loaded Dataset like Date and Adj Close
Out[27]:
(5309, 5)

Spliting the dataset into training (67%) and testing (33%) set¶

In [28]:
# Splitting data into training and testing 

train = pd.DataFrame(data[0:int(len(data)*0.67)])
test = pd.DataFrame(data[int(len(data)*0.67): int(len(data))])

print(train.shape)
print(test.shape)
(3557, 21)
(1752, 21)
In [29]:
train.head()
Out[29]:
Date Open High Low Close Adj Close Volume SMA20 SMA50 SMA100 ... SMA300 SMA400 SMA500 EMA20 EMA50 EMA100 EMA200 EMA300 EMA400 EMA500
0 2002-08-12 38.724998 40.000000 38.724998 39.700001 28.128601 212976 NaN NaN NaN ... NaN NaN NaN 39.700001 39.700001 39.700001 39.700001 39.700001 39.700001 39.700001
1 2002-08-13 39.750000 40.387501 38.875000 39.162498 27.747757 153576 NaN NaN NaN ... NaN NaN NaN 39.648810 39.678922 39.689357 39.694652 39.696429 39.697320 39.697855
2 2002-08-14 39.250000 39.250000 35.724998 36.462502 25.834730 822776 NaN NaN NaN ... NaN NaN NaN 39.345352 39.552788 39.625459 39.662492 39.674941 39.681186 39.684939
3 2002-08-15 36.462502 36.462502 36.462502 36.462502 25.834730 0 NaN NaN NaN ... NaN NaN NaN 39.070795 39.431600 39.562826 39.630651 39.653596 39.665133 39.672075
4 2002-08-16 36.275002 38.000000 35.750000 36.375000 25.772724 811856 NaN NaN NaN ... NaN NaN NaN 38.814053 39.311734 39.499701 39.598257 39.631812 39.648723 39.658913

5 rows × 21 columns

In [30]:
train.tail()
Out[30]:
Date Open High Low Close Adj Close Volume SMA20 SMA50 SMA100 ... SMA300 SMA400 SMA500 EMA20 EMA50 EMA100 EMA200 EMA300 EMA400 EMA500
3552 2016-11-21 1063.099976 1070.000000 1052.500000 1066.449951 924.593872 1793366 1136.617499 1171.703501 1225.988752 ... 1227.106751 1240.391626 1248.70965 1119.390066 1168.004245 1201.703014 1220.838312 1224.331149 1218.152192 1204.280186
3553 2016-11-22 1066.000000 1088.724976 1061.025024 1067.500000 925.504456 1318860 1129.276251 1168.213000 1223.437002 ... 1226.374001 1239.829126 1248.27500 1114.448155 1164.062902 1199.045529 1219.312558 1223.289082 1217.400809 1203.734157
3554 2016-11-23 1074.500000 1081.474976 1063.599976 1078.175049 934.759460 1275808 1122.492505 1165.371001 1221.349003 ... 1225.683168 1239.222814 1247.85065 1110.993573 1160.694751 1196.652054 1217.908205 1222.324869 1216.706417 1203.232923
3555 2016-11-24 1077.449951 1103.800049 1068.500000 1094.224976 948.674561 3026234 1117.242505 1164.034500 1219.812002 ... 1225.057168 1238.627689 1247.44735 1109.396564 1158.088093 1194.623795 1216.677526 1221.473707 1216.095536 1202.797762
3556 2016-11-25 1106.000000 1153.625000 1096.099976 1150.175049 997.182129 4276148 1114.845007 1163.513501 1218.994003 ... 1224.662085 1238.185939 1247.15925 1113.280229 1157.777778 1193.743622 1216.015810 1220.999961 1215.766756 1202.587691

5 rows × 21 columns

In [31]:
train.sample(5)
Out[31]:
Date Open High Low Close Adj Close Volume SMA20 SMA50 SMA100 ... SMA300 SMA400 SMA500 EMA20 EMA50 EMA100 EMA200 EMA300 EMA400 EMA500
2058 2010-10-21 487.024994 496.924988 485.000000 493.100006 382.249969 3752354 476.611249 453.750999 426.199500 ... 374.058417 325.090000 285.415475 476.459540 457.326237 435.225716 401.277129 371.995875 349.090566 331.519395
2120 2011-01-20 590.000000 609.375000 590.000000 606.099976 470.741516 4545208 576.311246 554.383000 515.760499 ... 428.262250 386.249500 337.570550 575.257013 554.692804 521.725570 471.512525 432.951904 403.022264 379.736735
153 2003-03-13 42.212502 42.724998 41.000000 41.924999 29.705076 390112 45.473750 49.991000 47.649375 ... NaN NaN NaN 45.267904 47.573099 47.105592 44.988859 43.690226 42.886022 42.346951
1170 2007-03-16 310.350006 314.462494 303.787506 309.412506 224.759613 4224284 310.409381 317.559504 300.423127 ... 253.930818 236.268464 221.501276 309.623874 309.904374 299.100152 276.952676 258.617347 242.530867 228.115224
3227 2015-07-22 1280.500000 1284.500000 1260.525024 1264.025024 1069.319336 1320628 1280.772498 1282.233499 1283.352251 ... 1252.931499 1211.223124 1162.453799 1277.082674 1279.024342 1278.566016 1261.820507 1227.520644 1185.933873 1142.886762

5 rows × 21 columns

In [32]:
test.head()
Out[32]:
Date Open High Low Close Adj Close Volume SMA20 SMA50 SMA100 ... SMA300 SMA400 SMA500 EMA20 EMA50 EMA100 EMA200 EMA300 EMA400 EMA500
3557 2016-11-28 1147.0 1159.724976 1125.949951 1141.300049 989.487671 1924296 1111.457507 1162.749001 1217.892504 ... 1224.146752 1237.719814 1246.83540 1115.948783 1157.131592 1192.705135 1215.272369 1220.470394 1215.395351 1202.343030
3558 2016-11-29 1137.5 1146.000000 1126.275024 1129.925049 979.625610 1442296 1107.986261 1162.063003 1216.426254 ... 1223.570502 1237.259939 1246.47695 1117.279856 1156.064669 1191.461965 1214.423142 1219.868764 1214.969065 1202.053936
3559 2016-11-30 1132.5 1147.375000 1121.150024 1138.025024 986.648376 3445832 1106.137512 1161.554004 1215.302253 ... 1223.118169 1236.876501 1246.16715 1119.255587 1155.357232 1190.403808 1213.662962 1219.324951 1214.585304 1201.798332
3560 2016-12-01 1138.5 1145.000000 1127.025024 1131.724976 981.185974 1123052 1105.122510 1160.571504 1214.148503 ... 1222.657668 1236.612251 1245.85685 1120.443148 1154.430477 1189.241851 1212.847658 1218.742892 1214.172036 1201.518598
3561 2016-12-02 1129.0 1135.400024 1102.599976 1110.925049 963.153076 1998032 1102.682513 1158.675505 1212.831254 ... 1222.126919 1236.353564 1245.47325 1119.536662 1152.724382 1187.691023 1211.833503 1218.026494 1213.657088 1201.156947

5 rows × 21 columns

In [33]:
test.tail()
Out[33]:
Date Open High Low Close Adj Close Volume SMA20 SMA50 SMA100 ... SMA300 SMA400 SMA500 EMA20 EMA50 EMA100 EMA200 EMA300 EMA400 EMA500
5304 2023-12-20 3827.250000 3898.800049 3766.550049 3780.050049 3780.050049 2586083 3613.032495 3511.515000 3491.664500 ... 3354.508665 3323.052623 3395.608696 3644.695999 3552.512366 3491.774595 3428.860636 3396.828466 3365.839114 3326.522304
5305 2023-12-21 3756.250000 3806.699951 3743.350098 3787.500000 3787.500000 1517562 3625.900000 3514.497998 3495.656499 ... 3356.829832 3323.999748 3396.042996 3658.296380 3561.727567 3497.630543 3432.429187 3399.424290 3367.942161 3328.362535
5306 2023-12-22 3800.000000 3845.949951 3762.000000 3824.000000 3824.000000 2413058 3641.687500 3518.400000 3499.927500 ... 3359.236665 3325.022498 3396.527795 3674.077677 3572.012761 3504.093305 3436.325414 3402.245391 3370.216764 3330.341127
5307 2023-12-26 3819.850098 3834.000000 3790.149902 3795.550049 3795.550049 1285231 3658.609998 3522.113003 3504.329001 ... 3361.672166 3326.069373 3396.950195 3685.646474 3580.778929 3509.864726 3439.899789 3404.858711 3372.338127 3332.198249
5308 2023-12-27 3799.000000 3818.199951 3768.000000 3811.199951 3811.199951 1293976 3675.662500 3527.486001 3508.226501 ... 3363.980999 3326.968373 3397.458795 3697.603948 3589.815047 3515.831760 3443.594318 3407.558653 3374.526964 3334.110431

5 rows × 21 columns

In [34]:
test.sample(5)
Out[34]:
Date Open High Low Close Adj Close Volume SMA20 SMA50 SMA100 ... SMA300 SMA400 SMA500 EMA20 EMA50 EMA100 EMA200 EMA300 EMA400 EMA500
4476 2020-08-19 2276.350098 2284.600098 2252.500000 2256.600098 2122.070068 2843005 2257.264990 2183.912983 2039.275493 ... 2101.543334 2083.503378 2064.349501 2250.551974 2187.660364 2121.233141 2085.679021 2062.332345 2029.047163 1989.160935
5113 2023-03-10 3312.899902 3337.250000 3290.000000 3331.000000 3289.323242 1024404 3418.905005 3392.502007 3337.480002 ... 3401.662828 3455.539366 3399.470695 3390.017757 3385.112685 3350.396423 3340.311736 3329.741484 3292.794901 3237.066875
5188 2023-07-03 3314.300049 3318.800049 3268.750000 3272.300049 3255.389404 1687264 3234.237488 3245.025996 3274.064500 ... 3274.502331 3378.814495 3409.514093 3239.866411 3245.884893 3258.452357 3280.462936 3288.496339 3271.249361 3233.491362
5198 2023-07-17 3510.000000 3549.899902 3477.050049 3491.699951 3473.655762 2743228 3287.107483 3274.313994 3257.640493 ... 3267.279330 3374.322244 3410.347893 3315.407416 3279.974189 3274.353768 3286.573383 3292.087641 3274.794381 3237.816269
5302 2023-12-18 3858.100098 3929.000000 3830.149902 3859.199951 3859.199951 2521612 3584.709998 3503.805000 3483.640999 ... 3349.152831 3321.188873 3394.883096 3610.895388 3532.083283 3479.279149 3421.402764 3391.442804 3361.495014 3322.734636

5 rows × 21 columns

Using MinMax scaler for normalization of the dataset¶


In [35]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
In [36]:
train_close = train.iloc[:, 4:5].values
test_close = test.iloc[:, 4:5].values
In [37]:
data_training_array = scaler.fit_transform(train_close)
data_training_array
Out[37]:
array([[3.12378868e-03],
       [2.72638213e-03],
       [7.30118150e-04],
       ...,
       [7.70928485e-01],
       [7.82795125e-01],
       [8.24162251e-01]])
In [38]:
x_train = []
y_train = [] 

for i in range(100, data_training_array.shape[0]):
    x_train.append(data_training_array[i-100: i])
    y_train.append(data_training_array[i, 0])

x_train, y_train = np.array(x_train), np.array(y_train) 
In [39]:
x_train.shape
Out[39]:
(3457, 100, 1)

ML Model (LSTM)¶


In [40]:
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.models import Sequential
In [41]:
model = Sequential()
model.add(LSTM(units = 50, activation = 'relu', return_sequences=True
              ,input_shape = (x_train.shape[1], 1)))
model.add(Dropout(0.2))


model.add(LSTM(units = 60, activation = 'relu', return_sequences=True))
model.add(Dropout(0.3))


model.add(LSTM(units = 80, activation = 'relu', return_sequences=True))
model.add(Dropout(0.4))


model.add(LSTM(units = 120, activation = 'relu'))
model.add(Dropout(0.5))

model.add(Dense(units = 1))
In [42]:
model.summary()
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 lstm (LSTM)                 (None, 100, 50)           10400     
                                                                 
 dropout (Dropout)           (None, 100, 50)           0         
                                                                 
 lstm_1 (LSTM)               (None, 100, 60)           26640     
                                                                 
 dropout_1 (Dropout)         (None, 100, 60)           0         
                                                                 
 lstm_2 (LSTM)               (None, 100, 80)           45120     
                                                                 
 dropout_2 (Dropout)         (None, 100, 80)           0         
                                                                 
 lstm_3 (LSTM)               (None, 120)               96480     
                                                                 
 dropout_3 (Dropout)         (None, 120)               0         
                                                                 
 dense (Dense)               (None, 1)                 121       
                                                                 
=================================================================
Total params: 178761 (698.29 KB)
Trainable params: 178761 (698.29 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________

Training the model¶


In [43]:
import tensorflow as tf
model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics=[tf.keras.metrics.MeanAbsoluteError()])
model.fit(x_train, y_train,epochs = 100)
Epoch 1/100
109/109 [==============================] - 34s 258ms/step - loss: 0.0254 - mean_absolute_error: 0.0955
Epoch 2/100
109/109 [==============================] - 30s 272ms/step - loss: 0.0098 - mean_absolute_error: 0.0669
Epoch 3/100
109/109 [==============================] - 30s 274ms/step - loss: 0.0058 - mean_absolute_error: 0.0497
Epoch 4/100
109/109 [==============================] - 28s 255ms/step - loss: 0.0063 - mean_absolute_error: 0.0511
Epoch 5/100
109/109 [==============================] - 38s 349ms/step - loss: 0.0048 - mean_absolute_error: 0.0440
Epoch 6/100
109/109 [==============================] - 26s 241ms/step - loss: 0.0046 - mean_absolute_error: 0.0435
Epoch 7/100
109/109 [==============================] - 27s 252ms/step - loss: 0.0048 - mean_absolute_error: 0.0443
Epoch 8/100
109/109 [==============================] - 26s 242ms/step - loss: 0.0040 - mean_absolute_error: 0.0412
Epoch 9/100
109/109 [==============================] - 28s 259ms/step - loss: 0.0038 - mean_absolute_error: 0.0398
Epoch 10/100
109/109 [==============================] - 26s 241ms/step - loss: 0.0042 - mean_absolute_error: 0.0403
Epoch 11/100
109/109 [==============================] - 36s 329ms/step - loss: 0.0033 - mean_absolute_error: 0.0366
Epoch 12/100
109/109 [==============================] - 42s 380ms/step - loss: 0.0039 - mean_absolute_error: 0.0411
Epoch 13/100
109/109 [==============================] - 35s 319ms/step - loss: 0.0033 - mean_absolute_error: 0.0364
Epoch 14/100
109/109 [==============================] - 35s 324ms/step - loss: 0.0033 - mean_absolute_error: 0.0370
Epoch 15/100
109/109 [==============================] - 42s 382ms/step - loss: 0.0032 - mean_absolute_error: 0.0364
Epoch 16/100
109/109 [==============================] - 30s 269ms/step - loss: 0.0030 - mean_absolute_error: 0.0359
Epoch 17/100
109/109 [==============================] - 35s 318ms/step - loss: 0.0033 - mean_absolute_error: 0.0381
Epoch 18/100
109/109 [==============================] - 34s 311ms/step - loss: 0.0029 - mean_absolute_error: 0.0357
Epoch 19/100
109/109 [==============================] - 32s 296ms/step - loss: 0.0028 - mean_absolute_error: 0.0355
Epoch 20/100
109/109 [==============================] - 38s 352ms/step - loss: 0.0029 - mean_absolute_error: 0.0364
Epoch 21/100
109/109 [==============================] - 29s 264ms/step - loss: 0.0030 - mean_absolute_error: 0.0371
Epoch 22/100
109/109 [==============================] - 35s 320ms/step - loss: 0.0028 - mean_absolute_error: 0.0358
Epoch 23/100
109/109 [==============================] - 40s 366ms/step - loss: 0.0029 - mean_absolute_error: 0.0360
Epoch 24/100
109/109 [==============================] - 33s 297ms/step - loss: 0.0027 - mean_absolute_error: 0.0359
Epoch 25/100
109/109 [==============================] - 33s 298ms/step - loss: 0.0029 - mean_absolute_error: 0.0370
Epoch 26/100
109/109 [==============================] - 35s 324ms/step - loss: 0.0028 - mean_absolute_error: 0.0370
Epoch 27/100
109/109 [==============================] - 40s 367ms/step - loss: 0.0029 - mean_absolute_error: 0.0370
Epoch 28/100
109/109 [==============================] - 31s 281ms/step - loss: 0.0027 - mean_absolute_error: 0.0359
Epoch 29/100
109/109 [==============================] - 32s 295ms/step - loss: 0.0026 - mean_absolute_error: 0.0358
Epoch 30/100
109/109 [==============================] - 39s 358ms/step - loss: 0.0027 - mean_absolute_error: 0.0370
Epoch 31/100
109/109 [==============================] - 31s 283ms/step - loss: 0.0027 - mean_absolute_error: 0.0362
Epoch 32/100
109/109 [==============================] - 30s 274ms/step - loss: 0.0025 - mean_absolute_error: 0.0353
Epoch 33/100
109/109 [==============================] - 30s 276ms/step - loss: 0.0026 - mean_absolute_error: 0.0350
Epoch 34/100
109/109 [==============================] - 32s 294ms/step - loss: 0.0025 - mean_absolute_error: 0.0354
Epoch 35/100
109/109 [==============================] - 44s 401ms/step - loss: 0.0024 - mean_absolute_error: 0.0346
Epoch 36/100
109/109 [==============================] - 42s 382ms/step - loss: 0.0024 - mean_absolute_error: 0.0346
Epoch 37/100
109/109 [==============================] - 43s 393ms/step - loss: 0.0025 - mean_absolute_error: 0.0348
Epoch 38/100
109/109 [==============================] - 43s 393ms/step - loss: 0.0025 - mean_absolute_error: 0.0350
Epoch 39/100
109/109 [==============================] - 43s 392ms/step - loss: 0.0026 - mean_absolute_error: 0.0358
Epoch 40/100
109/109 [==============================] - 41s 379ms/step - loss: 0.0025 - mean_absolute_error: 0.0355
Epoch 41/100
109/109 [==============================] - 41s 378ms/step - loss: 0.0026 - mean_absolute_error: 0.0367
Epoch 42/100
109/109 [==============================] - 41s 380ms/step - loss: 0.0026 - mean_absolute_error: 0.0360
Epoch 43/100
109/109 [==============================] - 40s 366ms/step - loss: 0.0029 - mean_absolute_error: 0.0383
Epoch 44/100
109/109 [==============================] - 41s 373ms/step - loss: 0.0027 - mean_absolute_error: 0.0368
Epoch 45/100
109/109 [==============================] - 41s 372ms/step - loss: 0.0027 - mean_absolute_error: 0.0366
Epoch 46/100
109/109 [==============================] - 37s 339ms/step - loss: 0.0026 - mean_absolute_error: 0.0369
Epoch 47/100
109/109 [==============================] - 29s 264ms/step - loss: 0.0026 - mean_absolute_error: 0.0370
Epoch 48/100
109/109 [==============================] - 38s 346ms/step - loss: 0.0026 - mean_absolute_error: 0.0367
Epoch 49/100
109/109 [==============================] - 42s 387ms/step - loss: 0.0023 - mean_absolute_error: 0.0344
Epoch 50/100
109/109 [==============================] - 37s 340ms/step - loss: 0.0024 - mean_absolute_error: 0.0355
Epoch 51/100
109/109 [==============================] - 37s 342ms/step - loss: 0.0025 - mean_absolute_error: 0.0361
Epoch 52/100
109/109 [==============================] - 34s 316ms/step - loss: 0.0025 - mean_absolute_error: 0.0352
Epoch 53/100
109/109 [==============================] - 33s 304ms/step - loss: 0.0025 - mean_absolute_error: 0.0356
Epoch 54/100
109/109 [==============================] - 33s 303ms/step - loss: 0.0029 - mean_absolute_error: 0.0383
Epoch 55/100
109/109 [==============================] - 35s 322ms/step - loss: 0.0026 - mean_absolute_error: 0.0361
Epoch 56/100
109/109 [==============================] - 33s 303ms/step - loss: 0.0023 - mean_absolute_error: 0.0352
Epoch 57/100
109/109 [==============================] - 34s 309ms/step - loss: 0.0024 - mean_absolute_error: 0.0354
Epoch 58/100
109/109 [==============================] - 31s 289ms/step - loss: 0.0023 - mean_absolute_error: 0.0346
Epoch 59/100
109/109 [==============================] - 38s 344ms/step - loss: 0.0024 - mean_absolute_error: 0.0356
Epoch 60/100
109/109 [==============================] - 33s 306ms/step - loss: 0.0024 - mean_absolute_error: 0.0347
Epoch 61/100
109/109 [==============================] - 36s 329ms/step - loss: 0.0024 - mean_absolute_error: 0.0354
Epoch 62/100
109/109 [==============================] - 35s 320ms/step - loss: 0.0025 - mean_absolute_error: 0.0368
Epoch 63/100
109/109 [==============================] - 34s 310ms/step - loss: 0.0024 - mean_absolute_error: 0.0353
Epoch 64/100
109/109 [==============================] - 35s 321ms/step - loss: 0.0025 - mean_absolute_error: 0.0356
Epoch 65/100
109/109 [==============================] - 35s 325ms/step - loss: 0.0023 - mean_absolute_error: 0.0349
Epoch 66/100
109/109 [==============================] - 34s 316ms/step - loss: 0.0026 - mean_absolute_error: 0.0355
Epoch 67/100
109/109 [==============================] - 33s 301ms/step - loss: 0.0025 - mean_absolute_error: 0.0360
Epoch 68/100
109/109 [==============================] - 35s 322ms/step - loss: 0.0023 - mean_absolute_error: 0.0344
Epoch 69/100
109/109 [==============================] - 39s 358ms/step - loss: 0.0024 - mean_absolute_error: 0.0346
Epoch 70/100
109/109 [==============================] - 40s 368ms/step - loss: 0.0023 - mean_absolute_error: 0.0343
Epoch 71/100
109/109 [==============================] - 36s 333ms/step - loss: 0.0030 - mean_absolute_error: 0.0382
Epoch 72/100
109/109 [==============================] - 33s 301ms/step - loss: 0.0025 - mean_absolute_error: 0.0363
Epoch 73/100
109/109 [==============================] - 33s 305ms/step - loss: 0.0025 - mean_absolute_error: 0.0361
Epoch 74/100
109/109 [==============================] - 31s 287ms/step - loss: 0.0023 - mean_absolute_error: 0.0347
Epoch 75/100
109/109 [==============================] - 31s 286ms/step - loss: 0.0025 - mean_absolute_error: 0.0357
Epoch 76/100
109/109 [==============================] - 32s 295ms/step - loss: 0.0024 - mean_absolute_error: 0.0350
Epoch 77/100
109/109 [==============================] - 32s 291ms/step - loss: 0.0023 - mean_absolute_error: 0.0338
Epoch 78/100
109/109 [==============================] - 31s 282ms/step - loss: 0.0024 - mean_absolute_error: 0.0351
Epoch 79/100
109/109 [==============================] - 31s 283ms/step - loss: 0.0024 - mean_absolute_error: 0.0361
Epoch 80/100
109/109 [==============================] - 32s 294ms/step - loss: 0.0022 - mean_absolute_error: 0.0343
Epoch 81/100
109/109 [==============================] - 32s 296ms/step - loss: 0.0040 - mean_absolute_error: 0.0386
Epoch 82/100
109/109 [==============================] - 31s 284ms/step - loss: 0.0027 - mean_absolute_error: 0.0361
Epoch 83/100
109/109 [==============================] - 31s 286ms/step - loss: 0.0024 - mean_absolute_error: 0.0348
Epoch 84/100
109/109 [==============================] - 34s 314ms/step - loss: 0.0030 - mean_absolute_error: 0.0386
Epoch 85/100
109/109 [==============================] - 32s 296ms/step - loss: 0.0025 - mean_absolute_error: 0.0351
Epoch 86/100
109/109 [==============================] - 31s 285ms/step - loss: 0.0025 - mean_absolute_error: 0.0356
Epoch 87/100
109/109 [==============================] - 29s 269ms/step - loss: 0.0025 - mean_absolute_error: 0.0351
Epoch 88/100
109/109 [==============================] - 27s 249ms/step - loss: 0.0025 - mean_absolute_error: 0.0362
Epoch 89/100
109/109 [==============================] - 26s 236ms/step - loss: 0.0024 - mean_absolute_error: 0.0352
Epoch 90/100
109/109 [==============================] - 27s 248ms/step - loss: 0.0025 - mean_absolute_error: 0.0363
Epoch 91/100
109/109 [==============================] - 26s 238ms/step - loss: 0.0025 - mean_absolute_error: 0.0358
Epoch 92/100
109/109 [==============================] - 26s 236ms/step - loss: 0.0025 - mean_absolute_error: 0.0357
Epoch 93/100
109/109 [==============================] - 27s 246ms/step - loss: 0.0029 - mean_absolute_error: 0.0383
Epoch 94/100
109/109 [==============================] - 26s 235ms/step - loss: 0.0024 - mean_absolute_error: 0.0359
Epoch 95/100
109/109 [==============================] - 27s 249ms/step - loss: 0.0024 - mean_absolute_error: 0.0351
Epoch 96/100
109/109 [==============================] - 27s 248ms/step - loss: 0.0024 - mean_absolute_error: 0.0352
Epoch 97/100
109/109 [==============================] - 36s 331ms/step - loss: 0.0023 - mean_absolute_error: 0.0347
Epoch 98/100
109/109 [==============================] - 33s 304ms/step - loss: 0.0024 - mean_absolute_error: 0.0352
Epoch 99/100
109/109 [==============================] - 36s 328ms/step - loss: 0.0022 - mean_absolute_error: 0.0339
Epoch 100/100
109/109 [==============================] - 34s 308ms/step - loss: 0.0023 - mean_absolute_error: 0.0346
Out[43]:
<keras.src.callbacks.History at 0x7f9356a5dd50>
In [44]:
model.save('keras_model.h5')
In [45]:
test_close.shape
Out[45]:
(1752, 1)
In [46]:
past_100_days = pd.DataFrame(train_close[-100:])
In [47]:
test_df = pd.DataFrame(test_close)

Defining the final dataset for testing by including last 100 coloums of the training dataset to get the prediction from the 1st column of the testing dataset.


In [48]:
final_df = pd.concat([past_100_days, test_df], ignore_index=True)
In [49]:
final_df.head()
Out[49]:
0
0 1251.449951
1 1276.550049
2 1250.425049
3 1247.099976
4 1242.650024
In [50]:
input_data = scaler.fit_transform(final_df)
input_data
Out[50]:
array([[0.06766715],
       [0.07612242],
       [0.0673219 ],
       ...,
       [0.93426142],
       [0.92467771],
       [0.92994957]])
In [51]:
input_data.shape
Out[51]:
(1852, 1)

Testing the model¶


In [52]:
x_test = []
y_test = []
for i in range(100, input_data.shape[0]):
   x_test.append(input_data[i-100: i])
   y_test.append(input_data[i, 0])
In [53]:
x_test, y_test = np.array(x_test), np.array(y_test)
print(x_test.shape)
print(y_test.shape)
(1752, 100, 1)
(1752,)

Making prediction and plotting the graph of predicted vs actual values¶


In [54]:
# Making predictions

y_pred = model.predict(x_test)
55/55 [==============================] - 5s 86ms/step
In [55]:
y_pred.shape
Out[55]:
(1752, 1)
In [56]:
y_test
Out[56]:
array([0.03056183, 0.02673003, 0.0294586 , ..., 0.93426142, 0.92467771,
       0.92994957])
In [57]:
y_pred
Out[57]:
array([[0.11212508],
       [0.11159423],
       [0.11180729],
       ...,
       [0.8013097 ],
       [0.8074249 ],
       [0.8124113 ]], dtype=float32)
In [58]:
scaler.scale_
Out[58]:
array([0.00033686])
In [59]:
scale_factor = 1/0.00041967
y_pred = y_pred * scale_factor
y_test = y_test * scale_factor
In [60]:
plt.figure(figsize = (12,6))
plt.plot(y_test, 'b', label = "Original Price")
plt.plot(y_pred, 'r', label = "Predicted Price")
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()
No description has been provided for this image

Model evaluation¶

Calculation of mean absolute error

In [61]:
from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error(y_test, y_pred)
mae_percentage = (mae / np.mean(y_test)) * 100
print("Mean absolute error on test set: {:.2f}%".format(mae_percentage))
Mean absolute error on test set: 14.20%

Calculation of R2 score

In [62]:
from sklearn.metrics import r2_score

# Actual values
actual = y_test

# Predicted values
predicted = y_pred

# Calculate the R2 score
r2 = r2_score(actual, predicted)

print("R2 score:", r2)
R2 score: 0.9272452174290756
In [63]:
# Plotting the R2 score
fig, ax = plt.subplots()
ax.barh(0, r2, color='skyblue')
ax.set_xlim([-1, 1])
ax.set_yticks([])
ax.set_xlabel('R2 Score')
ax.set_title('R2 Score')

# Adding the R2 score value on the bar
ax.text(r2, 0, f'{r2:.2f}', va='center', color='black')

plt.show()
No description has been provided for this image
In [64]:
plt.scatter(actual, predicted)
plt.plot([min(actual), max(actual)], [min(predicted), ma<b><x(predicted)], 'r--')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title(f'R2 Score: {r2:.2f}')
plt.show()
No description has been provided for this image
In [65]:
print("The Error Rate of Prediction Model:",(mean_absolute_percentage_error(y_pred,y_test))*100)
print("The Accuracy of Prediction Model:",(1-mean_absolute_percentage_error(y_pred,y_test))*100)
The Error Rate of Prediction Model: 20.53598044414941
The Accuracy of Prediction Model: 79.46401955585058
In [66]:
plt.plot(y_test,color='red',label='original test data')
plt.plot(y_pred,color='green',label='predicted data')
plt.xlabel('time')
plt.ylabel('google stock price')
plt.legend()
plt.show()
No description has been provided for this image
In [ ]:
 
In [ ]: